Creating DataFrame of all test files


In [3]:
import pandas as pd

In [4]:
import numpy as np

In [5]:
import sys
sys.path.append('..')

In [6]:
from dis_ds import parsing

In [7]:
all_files = !ls ../test_data

In [8]:
full_path_all_files = ['../test_data/' + a for a in all_files]

In [9]:
all_files_df = parsing.parse_file_list(full_path_all_files)

In [14]:
all_files_df[:1000]


Out[14]:
bakerloo central circle district hammersmith-city jubilee metropolitan northern piccadilly victoria waterloo-city
2015-02-24 11:51:45 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:52:44 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:53:44 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:54:45 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:55:44 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:56:45 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:01:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:02:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:03:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:04:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:05:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:06:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:07:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:08:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:09:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:10:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:11:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:12:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:13:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:14:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:15:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:16:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:17:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:18:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:19:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:20:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:21:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:22:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:23:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:24:14 10 10 10 10 10 10 10 10 10 10 10
... ... ... ... ... ... ... ... ... ... ... ...
2015-02-25 04:05:15 10 10 10 10 20 20 20 20 10 20 20
2015-02-25 04:06:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:07:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:08:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:09:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:10:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:11:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:12:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:13:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:14:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:15:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:16:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:17:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:18:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:19:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:20:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:21:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:22:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:23:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:24:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:25:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:26:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:27:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:28:15 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:29:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:30:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:31:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:32:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:33:14 10 10 10 10 10 10 10 10 10 10 20
2015-02-25 04:34:14 10 10 10 10 10 10 10 10 10 10 20

1000 rows × 11 columns


In [17]:
import xlsxwriter


writer = pd.ExcelWriter('tfldata.xlsx', engine='xlsxwriter')


all_files_df.to_excel(writer, sheet_name="Sheet 1")

In [11]:
all_files_df.save(all_files_df)


/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py:1000: FutureWarning: save is deprecated, use to_pickle
  warnings.warn("save is deprecated, use to_pickle", FutureWarning)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-11-33722759911e> in <module>()
----> 1 all_files_df.save(all_files_df)

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py in save(self, path)
    999         from pandas.io.pickle import to_pickle
   1000         warnings.warn("save is deprecated, use to_pickle", FutureWarning)
-> 1001         return to_pickle(self, path)
   1002 
   1003     def load(self, path):  # TODO remove in 0.14

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/pickle.py in to_pickle(obj, path)
     11         File path
     12     """
---> 13     with open(path, 'wb') as f:
     14         pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
     15 

TypeError: invalid file:                      bakerloo  central  circle  district  hammersmith-city  \
2015-02-24 11:51:45        10       10      10        10                10   
2015-02-24 11:52:44        10       10      10        10                10   
2015-02-24 11:53:44        10       10      10        10                10   
2015-02-24 11:54:45        10       10      10        10                10   
2015-02-24 11:55:44        10       10      10        10                10   
2015-02-24 11:56:45        10       10      10        10                10   
2015-02-24 12:01:14        10       10      10        10                10   
2015-02-24 12:02:15        10       10      10        10                10   
2015-02-24 12:03:14        10       10      10        10                10   
2015-02-24 12:04:14        10       10      10        10                10   
2015-02-24 12:05:14        10       10      10        10                10   
2015-02-24 12:06:14        10       10      10        10                10   
2015-02-24 12:07:15        10       10      10        10                10   
2015-02-24 12:08:15        10       10      10        10                10   
2015-02-24 12:09:14        10       10      10        10                10   
2015-02-24 12:10:14        10       10      10        10                10   
2015-02-24 12:11:15        10       10      10        10                10   
2015-02-24 12:12:14        10       10      10        10                10   
2015-02-24 12:13:14        10       10      10        10                10   
2015-02-24 12:14:14        10       10      10        10                10   
2015-02-24 12:15:14        10       10      10        10                10   
2015-02-24 12:16:14        10       10      10        10                10   
2015-02-24 12:17:15        10       10      10        10                10   
2015-02-24 12:18:14        10       10      10        10                10   
2015-02-24 12:19:14        10       10      10        10                10   
2015-02-24 12:20:14        10       10      10        10                10   
2015-02-24 12:21:15        10       10      10        10                10   
2015-02-24 12:22:15        10       10      10        10                10   
2015-02-24 12:23:14        10       10      10        10                10   
2015-02-24 12:24:14        10       10      10        10                10   
...                       ...      ...     ...       ...               ...   
2015-03-03 09:29:13        10       10      10        10                10   
2015-03-03 09:30:14        10       10      10        10                10   
2015-03-03 09:31:14        10       10      10        10                10   
2015-03-03 09:32:13        10       10      10        10                10   
2015-03-03 09:33:13        10       10      10        10                10   
2015-03-03 09:34:14        10       10      10        10                10   
2015-03-03 09:35:14        10       10      10        10                10   
2015-03-03 09:36:13        10       10      10        10                10   
2015-03-03 09:37:14        10       10      10        10                10   
2015-03-03 09:38:14        10       10      10        10                10   
2015-03-03 09:39:14        10       10      10        10                10   
2015-03-03 09:40:14        10       10      10        10                10   
2015-03-03 09:41:14        10       10      10        10                10   
2015-03-03 09:42:13        10       10      10        10                10   
2015-03-03 09:43:14        10       10      10        10                10   
2015-03-03 09:44:14        10       10      10        10                10   
2015-03-03 09:45:15        10       10      10        10                10   
2015-03-03 09:46:15        10       10      10        10                10   
2015-03-03 09:47:14        10       10      10        10                10   
2015-03-03 09:48:14        10       10      10        10                10   
2015-03-03 09:49:13        10       10      10        10                10   
2015-03-03 09:50:14        10       10      10        10                10   
2015-03-03 09:51:14        10       10      10        10                10   
2015-03-03 09:52:13        10       10      10        10                10   
2015-03-03 09:53:14        10       10      10        10                10   
2015-03-03 09:54:13        10       10      10        10                10   
2015-03-03 09:55:14        10       10      10        10                10   
2015-03-03 09:56:14        10       10      10        10                10   
2015-03-03 09:57:14        10       10      10        10                10   
2015-03-03 09:58:13        10       10      10        10                10   

                     jubilee  metropolitan  northern  piccadilly  victoria  \
2015-02-24 11:51:45       10            10        10          10        10   
2015-02-24 11:52:44       10            10        10          10        10   
2015-02-24 11:53:44       10            10        10          10        10   
2015-02-24 11:54:45       10            10        10          10        10   
2015-02-24 11:55:44       10            10        10          10        10   
2015-02-24 11:56:45       10            10        10          10        10   
2015-02-24 12:01:14       10            10        10          10        10   
2015-02-24 12:02:15       10            10        10          10        10   
2015-02-24 12:03:14       10            10        10          10        10   
2015-02-24 12:04:14       10            10        10          10        10   
2015-02-24 12:05:14       10            10        10          10        10   
2015-02-24 12:06:14       10            10        10          10        10   
2015-02-24 12:07:15       10            10        10          10        10   
2015-02-24 12:08:15       10            10        10          10        10   
2015-02-24 12:09:14       10            10        10          10        10   
2015-02-24 12:10:14       10            10        10          10        10   
2015-02-24 12:11:15       10            10        10          10        10   
2015-02-24 12:12:14       10            10        10          10        10   
2015-02-24 12:13:14       10            10        10          10        10   
2015-02-24 12:14:14       10            10        10          10        10   
2015-02-24 12:15:14       10            10        10          10        10   
2015-02-24 12:16:14       10            10        10          10        10   
2015-02-24 12:17:15       10            10        10          10        10   
2015-02-24 12:18:14       10            10        10          10        10   
2015-02-24 12:19:14       10            10        10          10        10   
2015-02-24 12:20:14       10            10        10          10        10   
2015-02-24 12:21:15       10            10        10          10        10   
2015-02-24 12:22:15       10            10        10          10        10   
2015-02-24 12:23:14       10            10        10          10        10   
2015-02-24 12:24:14       10            10        10          10        10   
...                      ...           ...       ...         ...       ...   
2015-03-03 09:29:13       10            10        10          10        10   
2015-03-03 09:30:14       10            10        10          10        10   
2015-03-03 09:31:14       10            10        10          10        10   
2015-03-03 09:32:13       10            10        10          10        10   
2015-03-03 09:33:13       10            10        10          10        10   
2015-03-03 09:34:14       10            10        10          10        10   
2015-03-03 09:35:14       10            10        10          10        10   
2015-03-03 09:36:13       10            10        10          10        10   
2015-03-03 09:37:14       10            10        10          10        10   
2015-03-03 09:38:14       10            10        10          10        10   
2015-03-03 09:39:14       10            10        10          10        10   
2015-03-03 09:40:14       10            10        10          10        10   
2015-03-03 09:41:14       10            10        10          10        10   
2015-03-03 09:42:13       10            10        10          10        10   
2015-03-03 09:43:14       10            10        10          10        10   
2015-03-03 09:44:14       10            10        10          10        10   
2015-03-03 09:45:15       10            10        10          10        10   
2015-03-03 09:46:15       10            10        10          10        10   
2015-03-03 09:47:14       10            10        10          10        10   
2015-03-03 09:48:14       10            10        10          10        10   
2015-03-03 09:49:13       10            10        10          10        10   
2015-03-03 09:50:14       10            10        10          10        10   
2015-03-03 09:51:14       10            10        10          10        10   
2015-03-03 09:52:13       10            10        10          10        10   
2015-03-03 09:53:14       10            10        10          10        10   
2015-03-03 09:54:13       10            10        10          10        10   
2015-03-03 09:55:14       10            10        10          10        10   
2015-03-03 09:56:14       10            10        10          10        10   
2015-03-03 09:57:14       10            10        10          10        10   
2015-03-03 09:58:13       10            10        10          10        10   

                     waterloo-city  
2015-02-24 11:51:45             10  
2015-02-24 11:52:44             10  
2015-02-24 11:53:44             10  
2015-02-24 11:54:45             10  
2015-02-24 11:55:44             10  
2015-02-24 11:56:45             10  
2015-02-24 12:01:14             10  
2015-02-24 12:02:15             10  
2015-02-24 12:03:14             10  
2015-02-24 12:04:14             10  
2015-02-24 12:05:14             10  
2015-02-24 12:06:14             10  
2015-02-24 12:07:15             10  
2015-02-24 12:08:15             10  
2015-02-24 12:09:14             10  
2015-02-24 12:10:14             10  
2015-02-24 12:11:15             10  
2015-02-24 12:12:14             10  
2015-02-24 12:13:14             10  
2015-02-24 12:14:14             10  
2015-02-24 12:15:14             10  
2015-02-24 12:16:14             10  
2015-02-24 12:17:15             10  
2015-02-24 12:18:14             10  
2015-02-24 12:19:14             10  
2015-02-24 12:20:14             10  
2015-02-24 12:21:15             10  
2015-02-24 12:22:15             10  
2015-02-24 12:23:14             10  
2015-02-24 12:24:14             10  
...                            ...  
2015-03-03 09:29:13             10  
2015-03-03 09:30:14             10  
2015-03-03 09:31:14             10  
2015-03-03 09:32:13             10  
2015-03-03 09:33:13             10  
2015-03-03 09:34:14             10  
2015-03-03 09:35:14             10  
2015-03-03 09:36:13             10  
2015-03-03 09:37:14             10  
2015-03-03 09:38:14             10  
2015-03-03 09:39:14             10  
2015-03-03 09:40:14             10  
2015-03-03 09:41:14             10  
2015-03-03 09:42:13             10  
2015-03-03 09:43:14             10  
2015-03-03 09:44:14             10  
2015-03-03 09:45:15             10  
2015-03-03 09:46:15             10  
2015-03-03 09:47:14             10  
2015-03-03 09:48:14             10  
2015-03-03 09:49:13             10  
2015-03-03 09:50:14             10  
2015-03-03 09:51:14             10  
2015-03-03 09:52:13             10  
2015-03-03 09:53:14             10  
2015-03-03 09:54:13             10  
2015-03-03 09:55:14             10  
2015-03-03 09:56:14             10  
2015-03-03 09:57:14             10  
2015-03-03 09:58:13             10  

[9944 rows x 11 columns]

Importing Test Files to PostgreSQL


In [13]:
%save?

In [16]:
from sqlalchemy import create_engine

In [17]:
engine = create_engine('postgres://pmgigyko:Mb7sR3WMZSNPYjm4FTvS0WRDhtqUgcam@pellefant.db.elephantsql.com:5432/pmgigyko')

In [18]:
all_files_df.to_sql('disruptions_test1',engine)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-18-111b9adef532> in <module>()
----> 1 all_files_df.to_sql('disruptions_test1',engine)

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/core/generic.py in to_sql(self, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
    980             self, name, con, flavor=flavor, schema=schema, if_exists=if_exists,
    981             index=index, index_label=index_label, chunksize=chunksize,
--> 982             dtype=dtype)
    983 
    984     def to_pickle(self, path):

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
    547     pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
    548                       index_label=index_label, schema=schema,
--> 549                       chunksize=chunksize, dtype=dtype)
    550 
    551 

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
   1185                          if_exists=if_exists, index_label=index_label,
   1186                          schema=schema, dtype=dtype)
-> 1187         table.create()
   1188         table.insert(chunksize)
   1189         # check for potentially case sensitivity issues (GH7815)

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/io/sql.py in create(self)
    648         if self.exists():
    649             if self.if_exists == 'fail':
--> 650                 raise ValueError("Table '%s' already exists." % self.name)
    651             elif self.if_exists == 'replace':
    652                 self.pd_sql.drop_table(self.name, self.schema)

ValueError: Table 'disruptions_test1' already exists.

Creating DataFrame of all Files


In [22]:
df_feb = parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-02')

In [24]:
df_march=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-03')

In [25]:
df_april=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-04')

In [31]:
df_may=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05')

In [32]:
df_may_full= parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05').to_string()


---------------------------------------------------------------------------
S3ResponseError                           Traceback (most recent call last)
<ipython-input-32-f771dfcf1dae> in <module>()
----> 1 df_may_full= parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05').to_string()

/Users/pivotal/dis/dis_ds/parsing.py in parse_s3_files(file_prefix)
     74     b = c.get_bucket('pivotal-london-dis')
     75     key_list = b.list(prefix=file_prefix)
---> 76     return parse_file_list(key_list)
     77 

/Users/pivotal/dis/dis_ds/parsing.py in parse_file_list(file_list)
     65 
     66 def parse_file_list(file_list):
---> 67     result_list = [parse_file(file) for file in file_list]
     68     result_df = pd.concat(result_list)
     69     return result_df

/Users/pivotal/dis/dis_ds/parsing.py in <listcomp>(.0)
     65 
     66 def parse_file_list(file_list):
---> 67     result_list = [parse_file(file) for file in file_list]
     68     result_df = pd.concat(result_list)
     69     return result_df

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucketlistresultset.py in bucket_lister(bucket, prefix, delimiter, marker, headers, encoding_type)
     32         rs = bucket.get_all_keys(prefix=prefix, marker=marker,
     33                                  delimiter=delimiter, headers=headers,
---> 34                                  encoding_type=encoding_type)
     35         for k in rs:
     36             yield k

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucket.py in get_all_keys(self, headers, **params)
    470         return self._get_all([('Contents', self.key_class),
    471                               ('CommonPrefixes', Prefix)],
--> 472                              '', headers, **params)
    473 
    474     def get_all_versions(self, headers=None, **params):

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/boto/s3/bucket.py in _get_all(self, element_map, initial_query_string, headers, **params)
    408         else:
    409             raise self.connection.provider.storage_response_error(
--> 410                 response.status, response.reason, body)
    411 
    412     def validate_kwarg_names(self, kwargs, names):

S3ResponseError: S3ResponseError: 403 Forbidden
<?xml version="1.0" encoding="UTF-8"?>
<Error><Code>RequestTimeTooSkewed</Code><Message>The difference between the request time and the current time is too large.</Message><RequestTime>Sat, 17 Oct 2015 04:02:16 GMT</RequestTime><ServerTime>2015-10-17T06:04:18Z</ServerTime><MaxAllowedSkewMilliseconds>900000</MaxAllowedSkewMilliseconds><RequestId>D656B3EB85DB0AC7</RequestId><HostId>h7a9QifJel2RsnFl4/Whqi/Muc9LqeIfOHj0fotpfE0WnwSNobbCramJkBGl+DvudoFLLA48ZU0=</HostId></Error>

In [27]:
frames = (df_feb,df_march,df_april,df_may)

In [28]:
total_df = pd.concat(frames)

In [29]:
total_df


Out[29]:
bakerloo central circle district hammersmith-city jubilee metropolitan northern piccadilly victoria waterloo-city
2015-02-24 11:51:45 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:52:44 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:53:44 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:54:45 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:55:44 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 11:56:45 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:01:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:02:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:03:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:04:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:05:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:06:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:07:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:08:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:09:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:10:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:11:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:12:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:13:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:14:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:15:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:16:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:17:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:18:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:19:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:20:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:21:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:22:15 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:23:14 10 10 10 10 10 10 10 10 10 10 10
2015-02-24 12:24:14 10 10 10 10 10 10 10 10 10 10 10
... ... ... ... ... ... ... ... ... ... ... ...
2015-05-31 15:47:55 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 16:04:35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 16:21:15 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 16:37:55 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 16:54:36 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 17:11:14 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 17:27:54 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 17:44:35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 18:01:18 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 18:17:55 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 18:34:35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 18:51:15 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 19:07:55 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 19:24:35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 19:41:16 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 19:57:55 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 20:14:45 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 20:31:15 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 20:47:55 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 21:04:35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 21:21:26 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 21:37:55 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 21:54:35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 22:11:15 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 22:27:54 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 22:44:35 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 23:01:15 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 23:17:56 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 23:34:36 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2015-05-31 23:51:15 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

132702 rows × 11 columns


In [ ]:
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-05')

In [ ]:
s3_files_df=parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-')

In [ ]:
s3_files_df

In [19]:
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-24_07:16:27')


Out[19]:
bakerloo central circle district hammersmith-city jubilee metropolitan northern piccadilly victoria waterloo-city
2015-09-24 07:16:27 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

In [28]:
parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-12_07:16:23')


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-be5970fda21c> in <module>()
----> 1 parsing.parse_s3_files('tfl_api_line_mode_status_tube_2015-09-12_07:16:23')

/Users/pivotal/dis/dis_ds/parsing.py in parse_s3_files(file_prefix)
     74     b = c.get_bucket('pivotal-london-dis')
     75     key_list = b.list(prefix=file_prefix)
---> 76     return parse_file_list(key_list)
     77 

/Users/pivotal/dis/dis_ds/parsing.py in parse_file_list(file_list)
     66 def parse_file_list(file_list):
     67     result_list = [parse_file(file) for file in file_list]
---> 68     result_df = pd.concat(result_list)
     69     return result_df
     70 

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/tools/merge.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
    752                        keys=keys, levels=levels, names=names,
    753                        verify_integrity=verify_integrity,
--> 754                        copy=copy)
    755     return op.get_result()
    756 

/Users/pivotal/anaconda/envs/python3.4/lib/python3.4/site-packages/pandas/tools/merge.py in __init__(self, objs, axis, join, join_axes, keys, levels, names, ignore_index, verify_integrity, copy)
    797 
    798         if len(objs) == 0:
--> 799             raise ValueError('All objects passed were None')
    800 
    801         # consolidate data & figure out what our result ndim is going to be

ValueError: All objects passed were None

In [46]:
s3_files_df.describe()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-46-0a7a1fef5d51> in <module>()
----> 1 pd.s3_files_df.describe()

AttributeError: 'module' object has no attribute 's3_files_df'

In [ ]:
s3_files_df

In [ ]: